This markdown provides comparison between two meQTL analysis after LD clumping. meQTL analysis were done using the R package MatrixEQTL. LD clumping was done using PLINK
For all analysis, the same DNAm data were used which underwent following QC steps:
DNAm QC
The imputed QC SNP data (no LD pruning, the MHC regions are included) were used. The data includes 3’957’338 SNPs.
All meQTLs passed the significance threshold of FDR = 0.05.
LD clumping was done per CpG using the following parameters:
–clump-p1 = 0.05: Significance threshold for index SNPs
–clump-p2 = 1: Secondary significance threshold for clumped SNPs
–clump-r2 = 0.2: LD threshold for clumping
–clump-kb = 200: Physical distance threshold for clumping
x %>%
ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
# scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
scale_fill_manual(values = cbPalette[c(1, 2)])
fdr.thr <- 0.05
plot.title <- paste0("Significant at FDR <= ", fdr.thr, " meQTLs.", "\nResult of MatrixEQTL")
GetScatterPlot2(meqtl.all.full.df, plot.title = plot.title, cbPalette = cbPalette)
# Load genomic locations
cpg.loc <- fread("/binder/mgp/workspace/2020_DexStim_Array_Human/dex-stim-human-array/data/integrative/matrixEQTL/cpg_locations.csv",
select = c("CpG_ID", "chr", "pos"))
meqtl.delta.notclumped.fn <- "/binder/mgp/workspace/2020_DexStim_Array_Human/dex-stim-human-array/output/data/integrative/matrixEQTL/meqtls/with_missingness/me-qtl_cis_result_snps_with_na_delta_beta_fdr_005.csv"
meqtl.delta.notclumped.df <- fread(meqtl.delta.notclumped.fn, col.names = col.names)
meqtl.loc.df <- left_join(meqtl.delta.notclumped.df, cpg.loc)
GetManhattanPlot(meqtl.loc.df)
# paste0('Scatter plot for the independent significant at FDR <= ', fdr.thr, ' meQTLs')
intersect.cpgs <- intersect(meqtl.all.full.df[treatment == "delta", CpG_ID], meqtl.all.full.df[treatment ==
"baseline", CpG_ID])
meqtl.id <- meqtl.all.full.df[treatment == "delta"][!(CpG_ID %in% intersect.cpgs)][fdr == min(fdr), meQTL_ID]
selected.meqtl <- meqtl.all.full.df[meQTL_ID %in% meqtl.id] # beta == max(beta)
kable(selected.meqtl %>%
dplyr::select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
mutate_if(is.numeric, funs(as.character(signif(., 3)))))
| SNP | CpG_ID | FC | t-stat | p-value | FDR | Treatment |
|---|---|---|---|---|---|---|
| rs9616713 | cg08880347 | 0.0271 | 8.74 | 1.13e-15 | 1.04e-09 | delta |
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl[1], fdr.thr = 0.05,
plot.title = plot.title)
meqtl.id <- meqtl.all.full.df[treatment == "delta"][fdr == min(fdr), meQTL_ID]
selected.meqtl <- meqtl.all.full.df[meQTL_ID %in% meqtl.id] # beta == max(beta)
kable(selected.meqtl %>%
dplyr::select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
mutate_if(is.numeric, funs(as.character(signif(., 3)))))
| SNP | CpG_ID | FC | t-stat | p-value | FDR | Treatment |
|---|---|---|---|---|---|---|
| rs12310416 | cg07195891 | -0.154 | -19.5 | 9.69e-47 | 4.48e-42 | baseline |
| rs12310416 | cg07195891 | 0.0718 | 16.7 | 2.35e-39 | 3.21e-30 | delta |
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl[1], fdr.thr = 0.05,
plot.title = plot.title)
euler.tbl <- euler(meqtls.meqtls)
# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5
plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))
euler.tbl <- euler(meqtls.snps)
# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5
plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))
euler.tbl <- euler(meqtls.cpgs)
# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5
plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))
ggplot(bins.df, aes(DistanceInterval)) + geom_bar(position = "identity", fill = cbPalette[2], colour = cbPalette[2],
alpha = 0.5) + stat_count(geom = "text", aes(label = comma(..count.., accuracy = 1L)), position = position_dodge(1),
vjust = -1, colour = "black", cex = 3) + theme(legend.position = c(0.1, 0.9), legend.title = element_blank(),
panel.grid.major = element_blank(), panel.background = element_blank(), axis.text.y = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 0.8, size = 10, color = "black"), plot.title = element_text(size = 8),
axis.title = element_text(size = 14)) + labs(title = paste0("Distribution of GR-response cis-meQTLs, FDR = ",
fdr.thr), x = "Distance Interval, bp", y = "No. cis-meQTLs") + scale_fill_manual(values = cbPalette[2])
ggplot(bins.df, aes(DistanceInterval)) + geom_bar(position = "identity", fill = cbPalette[1], colour = cbPalette[1],
alpha = 0.5) + stat_count(geom = "text", aes(label = label_number(scale = 0.001, suffix = "K", accuracy = 1)(..count..)),
position = position_dodge(1), vjust = -1, colour = "black", cex = 3) + theme(legend.position = c(0.1,
0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
axis.text.y = element_blank(), axis.text.x = element_text(angle = 45, vjust = 0.8, size = 10, color = "black"),
plot.title = element_text(size = 8), axis.title = element_text(size = 14)) + labs(title = paste0("Distribution of baseline cis-meQTLs, FDR = ",
fdr.thr), x = "Distance Interval, bp", y = "No. cis-meQTLs") + scale_fill_manual(values = cbPalette[2])
ggplot(meqtl.delta.dist.df, aes(x = dist, y = -log10(fdr))) + geom_point(colour = cbPalette[2]) + theme(legend.position = c(0.1,
0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL p-values plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
x = "Distance from SNP, bp", y = "-log10 FDR")
ggplot(meqtl.veh.dist.df, aes(x = dist, y = -log10(fdr))) + geom_point(colour = cbPalette[1]) + theme(legend.position = c(0.1,
0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL p-values plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
x = "Distance from SNP, bp", y = "-log10 FDR")
ggplot(meqtl.delta.dist.df, aes(x = dist, y = beta)) + geom_point(colour = cbPalette[2]) + theme(legend.position = c(0.1,
0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL effect size plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
x = "Distance from SNP, bp", y = "Effect size")
ggplot(meqtl.veh.dist.df, aes(x = dist, y = beta)) + geom_point(colour = cbPalette[1]) + theme(legend.position = c(0.1,
0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL effect size plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
x = "Distance from SNP, bp", y = "Effect size")